## MPs for Sale Replication replication
##
## Conservative MPs
## Estimand: ATT 
## Dependent variable: Income 
##
## Reproduce: 
##      - Table 2 (top panel) 
##      - Figure 3 (TASMD & KS statistic)
##      - Figure 4 (Q-Q Plot)
##
## Kevin Quinn and Guoer Liu
## 11/19/2024


# setwd("") # set at your current working directory
set.seed(91800198)


source("func.R")
library(haven)
library(WeightIt)
library(Matching)
#library(rgenoud) new package
library(cem)
library(data.table)
library(RColorBrewer)
library(lattice)



mydata <- read_dta("mpsforsale.dta")
mydata <- as.data.frame(mydata)

## subset to just Conservative MPs
mydata <- mydata[mydata$tory==1, ]

## remove covariates due to multicolinearity
cov.names.all <- names(mydata)[grep("xx", names(mydata))]
cov.form <- as.formula(paste("lnrealgross", '~', 
                             paste(cov.names.all, collapse = "+")))
mod <- lm(cov.form, mydata)
cov.names.sub <- names(mod$coefficients)[!is.na(mod$coefficients)]
cov.names.sub <- cov.names.sub[-1] # remove intercept


mydata.all <- mydata[, c("sname", "fname", "labour", "tory", cov.names.all,
                     "treated", "lnrealgross")]
mydata.all <- na.omit(mydata.all)

mydata.sub <- mydata[, c("sname", "fname", "labour", "tory", cov.names.sub,
                     "treated", "lnrealgross")]
mydata.sub <- na.omit(mydata.sub)

## NOTE that there are 222 observations after listwise deletion
##      Catherine Morton (row 386 of the original data) is missing year of birth


## formula
treat.form.cov.sub <- as.formula(paste('treated', "~",
			paste(cov.names.sub, collapse = "+"))) # model treatment
treat.form.cov.all <- as.formula(paste('treated', "~",
			paste(cov.names.all, collapse = "+"))) # model treatment
out.form.cov.sub <- as.formula(paste("lnrealgross", '~', "treated", "+",
			paste(cov.names.sub, collapse = "+"))) # model outcome




estimator <- NULL
estimate <- NULL
SE <- NULL
Eff.n <- NULL
Eff.n.ratio <- NULL
DFBETA.min <- NULL
DFBETA.max <- NULL
DFBETA.min.who <- NULL
DFBETA.max.who <- NULL
extrap.treat <- NULL
extrap.control <- NULL





## #############################################################
## general regression
estimator <- c(estimator, "reg.general")
mydata.1 <- mydata.sub[mydata.sub$treated==1,]
mydata.0 <- mydata.sub[mydata.sub$treated==0,]

reg.formula <- as.formula(paste('lnrealgross', "~",
                                paste(cov.names.sub, collapse = "+"))) 
lm.out.0 <- lm(reg.formula, data=mydata.0)

m0 <- predict(lm.out.0, newdata=mydata.1)

estimate <- c(estimate, mean(mydata.1$lnrealgross - m0))


## SE via nonparametric bootstrap
B <- 5000
n <- nrow(mydata.sub)
estim.bs <- rep(NA, B)
for (b in 1:B){
    bs.inds <- sample(1:n, n, replace=TRUE)
    mydata.bs <- mydata.sub[bs.inds,]

    mydata.bs.1 <- mydata.bs[mydata.bs$treated==1,]
    mydata.bs.0 <- mydata.bs[mydata.bs$treated==0,]

    lm.out.0 <- lm(reg.formula, data=mydata.bs.0)
    
    m0 <- predict(lm.out.0, newdata=mydata.bs.1)

    estim.bs[b] <- mean(mydata.bs.1$lnrealgross - m0)
}
SE <- c(SE, sd(estim.bs))


## get the WATE weights
X1 <- as.matrix(mydata.sub[mydata.sub$treated==1, cov.names.sub])
X1 <- cbind(1, X1)
X0 <- as.matrix(mydata.sub[mydata.sub$treated==0, cov.names.sub])
X0 <- cbind(1, X0)
n1 <- nrow(X1)
n0 <- nrow(X0)
n <- n1 + n0
treat.vec <- c(rep(1, n1), rep(0, n0))
y1 <- mydata.sub[mydata.sub$treated==1, "lnrealgross"]
y0 <- mydata.sub[mydata.sub$treated==0, "lnrealgross"]
X <- rbind(X1, X0)
y <- as.matrix(c(y1, y0), nrow=n, ncol=1)
ones.n <- matrix(1, nrow=n1, ncol=1)

w1 <- rep(1, n1)
w0 <- t(ones.n) %*% X1 %*% solve(t(X0) %*% X0) %*% t(X0)
w <- as.vector(c(w1, w0))

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(y, treat.vec, w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

mydata.reg.att <- rbind(mydata.1, mydata.0)
mydata.reg.att$w <- w
mydata.reg.att$DFBETA.out <- DFBETA.out

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)

DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.reg.att[min.ind, "fname"],
                                          mydata.reg.att[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.reg.att[max.ind, "fname"],
                                          mydata.reg.att[max.ind, "sname"]))

extrap.1 <- sum(abs(w1) * (w1 < 0)) / sum(w1 * (w1 >= 0))
extrap.0 <- sum(abs(w0) * (w0 < 0)) / sum(w0 * (w0 >= 0))

extrap.treat <- c(extrap.treat, extrap.1)
extrap.control <- c(extrap.control, extrap.0)


## END general regression (two regression models)
## #############################################################










## #############################################################
## general regression (constant effect)
estimator <- c(estimator, "reg.constant")

lm.out.cons <- lm(out.form.cov.sub, data=mydata.sub)

estimate <- c(estimate, summary(lm.out.cons)$coefficient[2, 1])

## SE via nonparametric bootstrap
B <- 5000
n <- nrow(mydata.sub)
estim.bs <- rep(NA, B)
for (b in 1:B){
    bs.inds <- sample(1:n, n, replace=TRUE)
    mydata.bs <- mydata.sub[bs.inds,]

    lm.out.cons <- lm(out.form.cov.sub, data=mydata.bs)

    estim.bs[b] <- summary(lm.out.cons)$coefficient[2, 1]
}
SE <- c(SE, sd(estim.bs))


## get the WATE weights
treatment <- mydata.sub$treated
y <- mydata.sub$lnrealgross

n <- length(treatment)
n1 <- sum(treatment == 1)
n0 <- sum(treatment == 0)

X <- as.matrix(mydata.sub[, cov.names.sub])
X.good.cols <- qr(X)$pivot[seq_len(qr(X)$rank)]
ordvec <- order(treatment)
treat <- treatment[ordvec]
X.cov <- X[ordvec, X.good.cols]
X <- cbind(1, treat, X.cov)
X0 <- cbind(1, 0, X.cov)
X1 <- cbind(1, 1, X.cov)

ones <- matrix(1, nrow = n, ncol = 1)
    
w1.full <- t(ones) %*% X1 %*% solve(crossprod(X)) %*% t(X)
w0.full <- t(ones) %*% X0 %*% solve(crossprod(X)) %*% t(X)
w1 <- w1.full[(n0+1):n] - w0.full[(n0+1):n]
w0 <- w0.full[1:n0] - w1.full[1:n0]
w.ordvec <- c(w0, w1) # stacked w
w <- w.ordvec[order(ordvec)] # w in the original order

w1 <- w[treatment == 1]
w0 <- w[treatment == 0]

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(y, treatment, w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)

mydata.reg_cons.att <- cbind(mydata.sub, w)
mydata.reg_cons.att$DFBETA.out <- DFBETA.out


DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.reg_cons.att[min.ind, "fname"],
                                          mydata.reg_cons.att[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.reg_cons.att[max.ind, "fname"],
                                          mydata.reg_cons.att[max.ind, "sname"]))

extrap.1 <- sum(abs(w1) * (w1 < 0)) / sum(w1 * (w1 >= 0))
extrap.0 <- sum(abs(w0) * (w0 < 0)) / sum(w0 * (w0 >= 0))

extrap.treat <- c(extrap.treat, extrap.1)
extrap.control <- c(extrap.control, extrap.0)


## END general regression (constant effect)
## #############################################################












## #############################################################
## propensity score weighting (logit)
estimator <- c(estimator, "sipw.logit")

glm.out <- glm(treat.form.cov.sub, data=mydata.sub, family=binomial(logit))
mydata.sub$pscores <- glm.out$fitted
mydata.sub$w <- 1 
mydata.sub$w[mydata.sub$treated==0] <-
    mydata.sub$pscores[mydata.sub$treated==0] /
    (1 - mydata.sub$pscores[mydata.sub$treated==0])

ATT.hat <- sum(mydata.sub$w * mydata.sub$treated * mydata.sub$lnrealgross) /
    sum(mydata.sub$w * mydata.sub$treated) -
    sum(mydata.sub$w * (1-mydata.sub$treated) * mydata.sub$lnrealgross) /
    sum(mydata.sub$w * (1-mydata.sub$treated))

estimate <- c(estimate, ATT.hat)


## SE via nonparametric bootstrap
B <- 5000
n <- nrow(mydata.sub)
estim.bs <- rep(NA, B)
for (b in 1:B){
    bs.inds <- sample(1:n, n, replace=TRUE)
    mydata.bs <- mydata.sub[bs.inds,]

    glm.out.bs <- glm(treat.form.cov.sub, data=mydata.bs,
                      family=binomial(logit))
    mydata.bs$pscores <- glm.out.bs$fitted
    mydata.bs$w <- 1 
    mydata.bs$w[mydata.bs$treated==0] <- mydata.bs$pscores[mydata.bs$treated==0] / (1 - mydata.bs$pscores[mydata.bs$treated==0]) 
     
    ATT.hat.bs <- sum(mydata.bs$w * mydata.bs$treated * mydata.bs$lnrealgross) /
        sum(mydata.bs$w * mydata.bs$treated) -
        sum(mydata.bs$w * (1-mydata.bs$treated) * mydata.bs$lnrealgross) /
        sum(mydata.bs$w * (1-mydata.bs$treated))
    
    
    estim.bs[b] <- ATT.hat.bs
}
SE <- c(SE, sd(estim.bs))


w0 <- mydata.sub$w[mydata.sub$treated==0]
w1 <- mydata.sub$w[mydata.sub$treated==1]
n1 <- sum(mydata.sub$treated)
n <- nrow(mydata.sub)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata.sub$lnrealgross, mydata.sub$treated,
                              mydata.sub$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.sub[min.ind, "fname"],
                                          mydata.sub[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.sub[max.ind, "fname"],
                                          mydata.sub[max.ind, "sname"]))


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)


mydata.sipw.logit.att <- mydata.sub
mydata.sipw.logit.att$DFBETA.out <- DFBETA.out



## END propensity score weighting (logit)
## #############################################################









## #############################################################
## entropy balancing
estimator <- c(estimator, "entropy.balancing")

eb.out <- weightit(treat.form.cov.all, data=mydata.all, method="ebal",
                   estimand="ATT")
mydata.all$w <- eb.out$weights

wls.out <- lm(lnrealgross ~ treated, weights=w, data=mydata.all)

ATT.hat <- coef(wls.out)[2]

estimate <- c(estimate, ATT.hat)
SE <- c(SE, sqrt(vcov(wls.out)[2,2]))


w0 <- mydata.all$w[mydata.all$treated==0]
w1 <- mydata.all$w[mydata.all$treated==1]
n1 <- sum(mydata.all$treated)
n <- nrow(mydata.all)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata.all$lnrealgross, mydata.all$treated,
                              mydata.all$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.all[min.ind, "fname"],
                                          mydata.all[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.all[max.ind, "fname"],
                                          mydata.all[max.ind, "sname"]))


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)

mydata.entropy.balancing.att <- mydata.all
mydata.entropy.balancing.att$DFBETA.out <- DFBETA.out



## END entropy balancing
## #############################################################




## #############################################################
## genetic matching
estimator <- c(estimator, "genetic.matching")

treat.vec <- mydata.all$treated
X <- as.matrix(mydata.all[, cov.names.all])
y <- mydata.all$lnrealgross

GenMatch.out <- GenMatch(Tr = treat.vec,
                         X = X,
                         estimand = "ATT",
                         M = 1, ties=FALSE,
                         replace = TRUE,
                         pop.size = 50000)

match.out <- Match(Y = y, Tr=treat.vec, X=X, estimand="ATT",
                   M=1, ties=FALSE, replace=TRUE, BiasAdjust=FALSE,
                   Weight.matrix=GenMatch.out$Weight.matrix)

estimate <- c(estimate, match.out$est)
SE <- c(SE, match.out$se.standard)

mydata.all$w <- 0
for (i in 1:nrow(mydata.all)){
     mydata.all$w[i] <- sum(match.out$index.control == i) +
            sum(match.out$index.treated == i)
}

w0 <- mydata.all$w[mydata.all$treated==0]
w1 <- mydata.all$w[mydata.all$treated==1]
n1 <- sum(mydata.all$treated)
n <- nrow(mydata.all)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata.all$lnrealgross, mydata.all$treated,
                              mydata.all$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.all[min.ind, "fname"],
                                          mydata.all[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.all[max.ind, "fname"],
                                          mydata.all[max.ind, "sname"]))


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)

mydata.genMatching.att <- mydata.all
mydata.genMatching.att$DFBETA.out <- DFBETA.out




## END genetic matching
## #############################################################





## #############################################################
## coarsened exact matching
estimator <- c(estimator, "CEM")

cem.out <- cem(treatment="treated", data=mydata.all,
               drop=c("sname", "fname", "labour", "tory", "treated",
                      "lnrealgross", "pscores", "w"))
att.out <- att(cem.out, formula = lnrealgross ~ treated, data=mydata.all)
s <- summary(att.out)


estimate <- c(estimate, s[2,1])
SE <- c(SE, s[2,2])

mydata.all$w <- cem.out$w

w0 <- mydata.all$w[mydata.all$treated==0]
w1 <- mydata.all$w[mydata.all$treated==1]
n1 <- sum(mydata.all$treated)
n <- nrow(mydata.all)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata.all$lnrealgross, mydata.all$treated,
                              mydata.all$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.all[min.ind, "fname"],
                                          mydata.all[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.all[max.ind, "fname"],
                                          mydata.all[max.ind, "sname"]))


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)

mydata.CEM.att <- mydata.all
mydata.CEM.att$DFBETA.out <- DFBETA.out




## END coarsened exact matching
## #############################################################




## #############################################################
## TABLE

### Table 2 (top panel) ###
out.data.att <- data.frame(estimator=estimator,
                       estimate=estimate,
                       SE=SE,
                       #Z = estimate / SE,
                       Eff.n=Eff.n,
                       Eff.n.ratio=Eff.n.ratio,
                       DFBETA.min=DFBETA.min,
                       DFBETA.min.who=DFBETA.min.who,
                       DFBETA.max=DFBETA.max,
                       DFBETA.max.who=DFBETA.max.who,
                       extrap.control=extrap.control,
                       extrap.treat=extrap.treat)

print(out.data.att)

#library(xtable)
#xtable(out.data.att, digits=c(0, 0, rep(3, 2), 1, rep(3, 2), 0, 3, 0, rep(3, 2)))

## #############################################################
## FIGURES 

### Figure 3: TASMD (left panel) ###

mydata.raw <- mydata.sub 
mydata.raw$w <- 1

data.list <- list(mydata.raw,
                  mydata.reg.att,
                  mydata.reg_cons.att,
                  mydata.sipw.logit.att,
                  mydata.entropy.balancing.att,
                  mydata.genMatching.att,
                  mydata.CEM.att)

TASMD.data <- as.data.frame(matrix(nrow = length(cov.names.sub), 
                                   ncol = length(data.list)))

colnames.var <- c("Raw Data",
                  "Regression (MRI)",
                  "Regression (URI)",
                  "PScores",
                  "Ebal",
                  "GenMatch", 
                  "CEM")
colnames(TASMD.data) <- colnames.var
rownames(TASMD.data) <- cov.names.sub

for(i in 1:length(data.list)){
    mydata <- data.list[[i]]
    for (xvar in cov.names.sub){
        tasmd <- TASMD(x=mydata[,xvar], w=mydata$w,
                       treatment=mydata$treated)
        TASMD.data[xvar, i] <- tasmd
    }
}

xvar.names <- c("Birth Year", "Death Year", 
                "Schooling: Public", "Schooling: Eton", 
                "Schooling: Regular",
                "University: Oxbridge", "University: Degree",
                "Aristocrat", "Female",
                "Teacher", "Barrister", "Solicitor",
                "Doctor", "Civil Servant", "Local Politician",
                "Business", "White Collar", "Journalist")


TASMD.data$xvar <- xvar.names

TASMD.long <- melt(setDT(TASMD.data),
                    id.vars="xvar", variable.name = "method")


colors <- brewer.pal(7, "Dark2")
colors <- replace(colors, c(1, 2), colors[c(2, 1)])

#pdf("TASMD-ConservativeMPs-att.pdf", height=7, width=8, family = "Times")
trell.par <- trellis.par.get() ## get default settings
trellis.par.set(superpose.symbol =
                    list(pch = c(16, 0:3, 18, 4), cex = 1,
                         col = colors                                    
                         ))
TASMD.long$xvar <- factor(TASMD.long$xvar, levels = unique(TASMD.long$xvar))
print(dotplot(xvar ~ value, data = TASMD.long,
              groups = method,
              xlab = "TASMD",
              scales = list(y = list(cex = 1.2), x = list(cex = 1.2)),
              panel = function(x, y, ...) {
                  panel.abline(h = 1:length(unique(TASMD.long$xvar)), col = "grey95", ...)
                  panel.abline(v = 0.1, col = "grey85")
                  panel.xyplot(x, y, ...)
              },
              key = simpleKey(levels(TASMD.long$method),
                              space = "right")
))
trellis.par.set(trell.par)  ## go back to default
#dev.off()



### Figure 3: KS statistics (right panel) ###

KS.data <- as.data.frame(matrix(nrow = length(cov.names.sub), 
                                ncol = length(data.list)))

colnames(KS.data) <- colnames.var
rownames(KS.data) <- cov.names.sub

for(i in 1:length(data.list)){
    mydata <- data.list[[i]]
    for (xvar in cov.names.sub){
        KS <- wtd.ks(x=mydata[,xvar], w=mydata$w,
                     treatment=mydata$treated)
        KS.data[xvar, i] <- KS
    }
}

KS.data$xvar <- xvar.names

KS.long <- melt(setDT(KS.data),
                    id.vars="xvar", variable.name = "method")

colors <- brewer.pal(7, "Dark2")
colors <- replace(colors, c(1, 2), colors[c(2, 1)])

#pdf("KS-ConservativeMPs-att.pdf", height=7, width=8, family = "Times")
trell.par <- trellis.par.get() ## get default settings
trellis.par.set(superpose.symbol =
                    list(pch = c(16, 0:3, 18, 4), cex = 1,
                         col = colors                                    
                         ))
KS.long$xvar <- factor(KS.long$xvar, levels = unique(KS.long$xvar))
print(dotplot(xvar ~ value, data = KS.long,
              groups = method,
              xlab = "KS Statistics",
              scales = list(y = list(cex = 1.2), x = list(cex = 1.2)),
              panel = function(x, y, ...) {
                  panel.abline(h = 1:length(unique(KS.long$xvar)), col = "grey95", ...)
                  panel.abline(v = 0, col = "grey85")
                  panel.xyplot(x, y, ...)
              },
              key = simpleKey(levels(KS.long$method),
                              space = "right")
))
trellis.par.set(trell.par)  ## go back to default
#dev.off()


## Figure 4: Q-Q plot (right) <birth year>

# Save the current plotting parameters
old_par <- par(no.readonly = TRUE)

# Open a PDF device
#pdf("wtdQQ-yob-mps-att.pdf", height=7, width=6, family = "Times")

# Adjust the plotting area to accommodate the legend
par(mfrow=c(1, 1), mar=c(8, 4, 4, 2) + 0.1)

# First QQ plot
wtd.qqplot(x = data.list[[1]][, "xxyob"], weights = data.list[[1]]$w,
           treatment = data.list[[1]]$treated,
           points.cex = 1.2, points.pch = 16,
           points.col = "#BF2C23", points.alpha = 0.9,
           x0lab = "Control",
           x1lab = "Treated")

wtd.qqplot(x = data.list[[6]][, "xxyob"], weights = data.list[[6]]$w,
           treatment = data.list[[6]]$treated, add = TRUE,
           points.cex = 1.2, points.pch = 4, points.lwd = 1.5,
           points.col = "#2F67B1", points.alpha = 1,
           x0lab = NULL,
           x1lab = NULL)

# Add the legend outside the plot area at the bottom
par(xpd=TRUE)
legend("bottom", 
       legend = c("Raw Data", "GenMatch"),
       col = c("#BF2C23", "#2F67B1"),
       pch = c(16, 4), pt.cex = 2, horiz = TRUE, inset = -0.3, xpd = NA)
# Close the PDF device
#dev.off()



## Figure 4: Q-Q plot (right) <death year>

# Open a PDF device
#pdf("wtdQQ-yod-mps-att.pdf", height=7, width=6, family = "Times")

# Adjust the plotting area to accommodate the legend
par(mfrow=c(1, 1), mar=c(8, 4, 4, 2) + 0.1)

# First QQ plot
wtd.qqplot(x = data.list[[1]][, "xxyod"], weights = data.list[[1]]$w,
           treatment = data.list[[1]]$treated,
           points.cex = 1.2, points.pch = 16,
           points.col = "#BF2C23", points.alpha = 0.9,
           x0lab = "Control",
           x1lab = "Treated")

wtd.qqplot(x = data.list[[6]][, "xxyod"], weights = data.list[[6]]$w,
           treatment = data.list[[6]]$treated, add = TRUE,
           points.cex = 1.2, points.pch = 4, points.lwd = 1.5,
           points.col = "#2F67B1", points.alpha = 1,
           x0lab = NULL,
           x1lab = NULL)

# Add the legend outside the plot area at the bottom
par(xpd=TRUE)
legend("bottom", 
       legend = c("Raw Data", "GenMatch"),
       col = c("#BF2C23", "#2F67B1"),
       pch = c(16, 4), pt.cex = 2, 
       horiz = TRUE, inset = -0.3, xpd = NA)

# Close the PDF device
#dev.off()


# Reset plotting parameters
par(old_par)














